##Reading Data

options(max.print=999999999)
options(warn=-1)
library(caret)
library(readstata13)
library(TSstudio)
library(caret)
library(ROCR)
library(pROC)
library(xgboost)
library(gbm)
library(dplyr)
library(gtsummary)
library(xlsx)
library(rJava)
library(ggplot2)
library(segmented)
library(rpart)
library(tidyverse)
library(modelr)
library(dotwhisker)
library(dplyr)
library(ggplot2)
library(ggpubr)
library(broom)
library(lme4)
library(MuMIn)
library(AICcmodavg)
library(psycho)
library(performance)
library(pbkrtest)
library(estimability)
library(emmeans)
library(lsmeans)
library(ggiraph)
library(ggiraphExtra)
require(plyr)


data2<-read.dta13(file = "Y:/Haroon Janjua/Project_Lap_Open_Rob_FL AHCA_Charges/Data_iHernia/Hernia_18_Robotic_Inflation.dta")

# looking at the dimensions of the dataset
dim(data2)
## [1] 8604   24
# looking at any missing values in our dataset
sum(is.na(data2))
## [1] 0
# taking a look at the structure of the varaibles in the dataset
str(data2[1:24])
## 'data.frame':    8604 obs. of  24 variables:
##  $ SYS_RECID          : num  62533124 62612463 62532927 65090208 63313825 ...
##  $ MCARE_NBR          : chr  "100006" "100006" "100006" "100006" ...
##  $ Tot_Open           : num  202 202 202 202 202 202 202 202 202 202 ...
##  $ Tot_Lap            : num  66 66 66 66 66 66 66 66 66 66 ...
##  $ Tot_Rob            : num  11 11 11 11 11 11 11 11 11 11 ...
##  $ Tot_Hernia         : num  279 279 279 279 279 279 279 279 279 279 ...
##  $ TCHGS              : num  46136 26501 25429 42431 34879 ...
##  $ OPRMCHGS           : num  17676 13616 10716 18775 10281 ...
##  $ Non_OPR_CHGS       : num  28460 12885 14713 23656 24598 ...
##  $ COST               : num  7213 4143 3976 6634 5453 ...
##  $ Cost_OPR           : num  2764 2129 1675 2935 1607 ...
##  $ COST_Real          : num  7663 4402 4224 7048 5793 ...
##  $ COST_OPR_Real      : num  2936 2262 1780 3118 1708 ...
##  $ Non_OPR_Cost       : num  4450 2015 2300 3699 3846 ...
##  $ CCRATIO            : num  0.156 0.156 0.156 0.156 0.156 ...
##  $ OPR_PC             : num  0.383 0.514 0.421 0.442 0.295 ...
##  $ OPR_PC_CHG         : num  0.383 0.514 0.421 0.442 0.295 ...
##  $ TOTAL_HOSPITAL_BEDS: int  1454 1454 1454 1454 1454 1454 1454 1454 1454 1454 ...
##  $ VISITS             : int  1222 4755 790 1373 778 4987 4755 1362 778 1373 ...
##  $ TIME               : num  3 3 3 3 3 3 3 3 3 3 ...
##  $ typeofproc2        : chr  "OPEN" "OPEN" "OPEN" "OPEN" ...
##  $ typeofproc         : num  1 1 1 1 1 2 1 1 1 1 ...
##  $ YEAR               : num  2017 2017 2017 2017 2017 ...
##  $ QTR                : int  1 1 1 4 2 2 1 2 2 4 ...
data2[c(3:19)] <- lapply(data2[,c(3:19)] , as.numeric)
data2$typeofproc2 <- as.factor(data2$typeofproc2)
data2$typeofproc <- as.factor(data2$typeofproc)

# taking a look at the structure of the varaibles in the dataset
str(data2[1:24])
## 'data.frame':    8604 obs. of  24 variables:
##  $ SYS_RECID          : num  62533124 62612463 62532927 65090208 63313825 ...
##  $ MCARE_NBR          : chr  "100006" "100006" "100006" "100006" ...
##  $ Tot_Open           : num  202 202 202 202 202 202 202 202 202 202 ...
##  $ Tot_Lap            : num  66 66 66 66 66 66 66 66 66 66 ...
##  $ Tot_Rob            : num  11 11 11 11 11 11 11 11 11 11 ...
##  $ Tot_Hernia         : num  279 279 279 279 279 279 279 279 279 279 ...
##  $ TCHGS              : num  46136 26501 25429 42431 34879 ...
##  $ OPRMCHGS           : num  17676 13616 10716 18775 10281 ...
##  $ Non_OPR_CHGS       : num  28460 12885 14713 23656 24598 ...
##  $ COST               : num  7213 4143 3976 6634 5453 ...
##  $ Cost_OPR           : num  2764 2129 1675 2935 1607 ...
##  $ COST_Real          : num  7663 4402 4224 7048 5793 ...
##  $ COST_OPR_Real      : num  2936 2262 1780 3118 1708 ...
##  $ Non_OPR_Cost       : num  4450 2015 2300 3699 3846 ...
##  $ CCRATIO            : num  0.156 0.156 0.156 0.156 0.156 ...
##  $ OPR_PC             : num  0.383 0.514 0.421 0.442 0.295 ...
##  $ OPR_PC_CHG         : num  0.383 0.514 0.421 0.442 0.295 ...
##  $ TOTAL_HOSPITAL_BEDS: num  1454 1454 1454 1454 1454 ...
##  $ VISITS             : num  1222 4755 790 1373 778 ...
##  $ TIME               : num  3 3 3 3 3 3 3 3 3 3 ...
##  $ typeofproc2        : Factor w/ 3 levels "LAP","OPEN","ROBOTIC": 2 2 2 2 2 1 2 2 2 2 ...
##  $ typeofproc         : Factor w/ 3 levels "1","2","3": 1 1 1 1 1 2 1 1 1 1 ...
##  $ YEAR               : num  2017 2017 2017 2017 2017 ...
##  $ QTR                : int  1 1 1 4 2 2 1 2 2 4 ...

#Linear Regression Models

# fit LM for Open 
set.seed(5842)
model.open <- lm(COST_Real ~ TIME , data = data2, subset = typeofproc2 == "OPEN")
summary(model.open)
## 
## Call:
## lm(formula = COST_Real ~ TIME, data = data2, subset = typeofproc2 == 
##     "OPEN")
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3869.4  -816.9  -130.8   521.5 10661.4 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3783.09      47.92   78.95   <2e-16 ***
## TIME          179.57      13.75   13.06   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1278 on 3913 degrees of freedom
## Multiple R-squared:  0.04179,    Adjusted R-squared:  0.04155 
## F-statistic: 170.7 on 1 and 3913 DF,  p-value: < 2.2e-16
# fit LM for Lap
set.seed(5842)
model.lap <- lm(COST_Real ~ TIME , data = data2, subset = typeofproc2 == "LAP")
summary(model.lap)
## 
## Call:
## lm(formula = COST_Real ~ TIME, data = data2, subset = typeofproc2 == 
##     "LAP")
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5093.3 -1041.4  -312.3   625.2 11916.6 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5155.21      97.47  52.892   <2e-16 ***
## TIME          240.92      25.26   9.539   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1710 on 1784 degrees of freedom
## Multiple R-squared:  0.04853,    Adjusted R-squared:  0.048 
## F-statistic: 90.99 on 1 and 1784 DF,  p-value: < 2.2e-16
# fit LM for Rob
set.seed(5842)
model.rob <- lm(COST_Real ~ TIME , data = data2, subset = typeofproc2 == "ROBOTIC")
summary(model.rob)
## 
## Call:
## lm(formula = COST_Real ~ TIME, data = data2, subset = typeofproc2 == 
##     "ROBOTIC")
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4797.3 -1325.0  -314.6  1004.6 12400.1 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5876.84     126.14   46.59   <2e-16 ***
## TIME          357.76      30.08   11.89   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2091 on 2901 degrees of freedom
## Multiple R-squared:  0.04651,    Adjusted R-squared:  0.04618 
## F-statistic: 141.5 on 1 and 2901 DF,  p-value: < 2.2e-16
model.open$coefficients
## (Intercept)        TIME 
##   3783.0915    179.5722
model.lap$coefficients
## (Intercept)        TIME 
##   5155.2086    240.9209
model.rob$coefficients
## (Intercept)        TIME 
##   5876.8365    357.7567
#Analysis of Variance
set.seed(5842)
m.interaction <- lm(COST_Real ~ TIME*typeofproc2, data = data2)
anova(m.interaction)
## Analysis of Variance Table
## 
## Response: COST_Real
##                    Df     Sum Sq    Mean Sq  F value    Pr(>F)    
## TIME                1 3.7662e+09 3766197301 1332.530 < 2.2e-16 ***
## typeofproc2         2 1.2076e+10 6037832136 2136.264 < 2.2e-16 ***
## TIME:typeofproc2    2 9.8491e+07   49245601   17.424 2.807e-08 ***
## Residuals        8598 2.4301e+10    2826352                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m.lst <- lstrends(m.interaction, "typeofproc2", var="TIME") ; m.lst
##  typeofproc2 TIME.trend   SE   df lower.CL upper.CL
##  LAP                241 24.8 8598      192      290
##  OPEN               180 18.1 8598      144      215
##  ROBOTIC            358 24.2 8598      310      405
## 
## Confidence level used: 0.95
pairs(m.lst)
##  contrast       estimate   SE   df t.ratio p.value
##  LAP - OPEN         61.3 30.7 8598   1.998  0.1128
##  LAP - ROBOTIC    -116.8 34.7 8598  -3.372  0.0022
##  OPEN - ROBOTIC   -178.2 30.2 8598  -5.903  <.0001
## 
## P value adjustment: tukey method for comparing a family of 3 estimates

#Interactive Linear Model Plots

ggPredict(model.open,se=TRUE,interactive=TRUE)
ggPredict(model.lap,se=TRUE,interactive=TRUE)
ggPredict(model.rob,se=TRUE,interactive=TRUE)

#Non Interactive Combined Linear Model Plots

data2 %>%
  ggplot(aes(x=TIME, 
             y=COST_Real, 
             color=typeofproc2))+
  geom_point()+
  geom_smooth(method="lm")

data2 %>%
  ggplot(aes(x=TIME, 
             y=COST_Real, 
             color=typeofproc2))+
    geom_smooth(method="lm")

#Year by Year Linear Reg Models

data3<-read.dta13(file = "Y:/Haroon Janjua/Project_Lap_Open_Rob_FL AHCA_Charges/Data_iHernia/Hernia_18_Robotic_YEARLY_Inflation.dta")

# looking at the dimensions of the dataset
dim(data3)
## [1] 11266    15
# looking at any missing values in our dataset
sum(is.na(data3))
## [1] 45064
# taking a look at the structure of the varaibles in the dataset
str(data3[1:15])
## 'data.frame':    11266 obs. of  15 variables:
##  $ SYS_RECID    : num  65308574 64540880 62533219 64539131 63274186 ...
##  $ COST         : num  13124 10914 13713 7286 9474 ...
##  $ Cost_OPR     : num  7312 6531 7810 2809 4804 ...
##  $ COST_Real    : num  13942 11595 14569 7740 10065 ...
##  $ COST_OPR_Real: num  7768 6939 8297 2984 5103 ...
##  $ Non_OPR_Cost : num  5812 4383 5904 4477 4670 ...
##  $ TIME         : num  3 3 3 3 3 3 3 3 3 3 ...
##  $ typeofproc2  : chr  "ROBOTIC" "ROBOTIC" "ROBOTIC" "ROBOTIC" ...
##  $ typeofproc   : num  3 3 3 3 3 3 3 3 3 3 ...
##  $ YEAR         : num  2017 2017 2017 2017 2017 ...
##  $ typeofproc3  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ typeofproc4  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ typeofproc5  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ typeofproc6  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ typeproc2    : chr  "ROB_15-20" "ROB_15-20" "ROB_15-20" "ROB_15-20" ...
data3[c(2:6)] <- lapply(data3[,c(2:6)] , as.numeric)
data3$typeofproc2 <- as.factor(data3$typeofproc2)
data3$typeofproc <- as.factor(data3$typeofproc)
data3$typeproc2 <- as.factor(data3$typeproc2)

# taking a look at the structure of the varaibles in the dataset
str(data3[1:15])
## 'data.frame':    11266 obs. of  15 variables:
##  $ SYS_RECID    : num  65308574 64540880 62533219 64539131 63274186 ...
##  $ COST         : num  13124 10914 13713 7286 9474 ...
##  $ Cost_OPR     : num  7312 6531 7810 2809 4804 ...
##  $ COST_Real    : num  13942 11595 14569 7740 10065 ...
##  $ COST_OPR_Real: num  7768 6939 8297 2984 5103 ...
##  $ Non_OPR_Cost : num  5812 4383 5904 4477 4670 ...
##  $ TIME         : num  3 3 3 3 3 3 3 3 3 3 ...
##  $ typeofproc2  : Factor w/ 2 levels "","ROBOTIC": 2 2 2 2 2 2 2 2 2 2 ...
##  $ typeofproc   : Factor w/ 1 level "3": 1 1 1 1 1 1 1 1 1 1 ...
##  $ YEAR         : num  2017 2017 2017 2017 2017 ...
##  $ typeofproc3  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ typeofproc4  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ typeofproc5  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ typeofproc6  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ typeproc2    : Factor w/ 5 levels "ROB_15-20","ROB_16-20",..: 1 1 1 1 1 1 1 1 1 1 ...
# fit LM for Rob 2015-2020
set.seed(5877)
model.rob <- lm(COST_Real ~ TIME , data = data3, subset = typeproc2 == "ROB_15-20")
summary(model.rob)
## 
## Call:
## lm(formula = COST_Real ~ TIME, data = data3, subset = typeproc2 == 
##     "ROB_15-20")
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4797.3 -1325.0  -314.6  1004.6 12400.1 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5876.84     126.14   46.59   <2e-16 ***
## TIME          357.76      30.08   11.89   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2091 on 2901 degrees of freedom
## Multiple R-squared:  0.04651,    Adjusted R-squared:  0.04618 
## F-statistic: 141.5 on 1 and 2901 DF,  p-value: < 2.2e-16
# fit LM for Rob 2016-2020
set.seed(5877)
model.rob16 <- lm(COST_Real ~ TIME , data = data3, subset = typeproc2 == "ROB_16-20")
summary(model.rob16)
## 
## Call:
## lm(formula = COST_Real ~ TIME, data = data3, subset = typeproc2 == 
##     "ROB_16-20")
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4825.3 -1302.2  -303.6  1014.3 10728.3 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5595.57     134.69   41.55   <2e-16 ***
## TIME          419.61      31.77   13.21   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2066 on 2835 degrees of freedom
## Multiple R-squared:  0.05798,    Adjusted R-squared:  0.05764 
## F-statistic: 174.5 on 1 and 2835 DF,  p-value: < 2.2e-16
# fit LM for Rob 2017-2020
set.seed(5877)
model.rob17 <- lm(COST_Real ~ TIME ,data = data3, subset = typeproc2 == "ROB_17-20")
summary(model.rob17)
## 
## Call:
## lm(formula = COST_Real ~ TIME, data = data3, subset = typeproc2 == 
##     "ROB_17-20")
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4852.9 -1322.1  -331.2  1009.6 10828.6 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4983.64     192.14   25.94   <2e-16 ***
## TIME          547.52      42.91   12.76   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2123 on 2469 degrees of freedom
## Multiple R-squared:  0.06186,    Adjusted R-squared:  0.06148 
## F-statistic: 162.8 on 1 and 2469 DF,  p-value: < 2.2e-16
# fit LM for Rob 2018-2020
set.seed(5877)
model.rob18 <- lm(COST_Real ~ TIME , data = data3, subset = typeproc2 == "ROB_18-20")
summary(model.rob18)
## 
## Call:
## lm(formula = COST_Real ~ TIME, data = data3, subset = typeproc2 == 
##     "ROB_18-20")
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4839.9 -1592.2  -276.4  1112.0 10985.4 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4252.12     365.51  11.633   <2e-16 ***
## TIME          691.21      75.03   9.212   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2264 on 1851 degrees of freedom
## Multiple R-squared:  0.04384,    Adjusted R-squared:  0.04332 
## F-statistic: 84.87 on 1 and 1851 DF,  p-value: < 2.2e-16
# fit LM for Rob 2019-2020
set.seed(5877)
model.rob19 <- lm(COST_Real ~ TIME ,data = data3, subset = typeproc2 == "ROB_19-20")
summary(model.rob19)
## 
## Call:
## lm(formula = COST_Real ~ TIME, data = data3, subset = typeproc2 == 
##     "ROB_19-20")
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4783.8 -1475.4  -293.8  1254.7  9424.9 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -362.8      777.8  -0.467    0.641    
## TIME          1542.8      147.2  10.481   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2253 on 1200 degrees of freedom
## Multiple R-squared:  0.08386,    Adjusted R-squared:  0.0831 
## F-statistic: 109.8 on 1 and 1200 DF,  p-value: < 2.2e-16
model.rob$coefficients
## (Intercept)        TIME 
##   5876.8365    357.7567
model.rob16$coefficients
## (Intercept)        TIME 
##   5595.5720    419.6061
model.rob17$coefficients
## (Intercept)        TIME 
##   4983.6387    547.5162
model.rob18$coefficients
## (Intercept)        TIME 
##   4252.1153    691.2144
model.rob19$coefficients
## (Intercept)        TIME 
##   -362.8423   1542.7673
#Analysis of Variance
m.interaction2 <- lm(COST_Real ~ TIME*typeproc2, data = data3)
anova(m.interaction2)
## Analysis of Variance Table
## 
## Response: COST_Real
##                   Df     Sum Sq    Mean Sq  F value Pr(>F)    
## TIME               1 2.9412e+09 2941172815 642.8818 <2e-16 ***
## typeproc2          4 2.3397e+07    5849365   1.2786 0.2758    
## TIME:typeproc2     4 3.9959e+08   99896355  21.8354 <2e-16 ***
## Residuals      11256 5.1496e+10    4574982                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m.interaction2$coefficients
##             (Intercept)                    TIME      typeproc2ROB_16-20 
##               5876.8365                357.7567               -281.2645 
##      typeproc2ROB_17-20      typeproc2ROB_18-20      typeproc2ROB_19-20 
##               -893.1978              -1624.7212              -6239.6788 
## TIME:typeproc2ROB_16-20 TIME:typeproc2ROB_17-20 TIME:typeproc2ROB_18-20 
##                 61.8494                189.7595                333.4577 
## TIME:typeproc2ROB_19-20 
##               1185.0106
m.lst2 <- lstrends(m.interaction2, "typeproc2", var="TIME") ; m.lst2
##  typeproc2 TIME.trend    SE    df lower.CL upper.CL
##  ROB_15-20        358  30.8 11256      297      418
##  ROB_16-20        420  32.9 11256      355      484
##  ROB_17-20        548  43.2 11256      463      632
##  ROB_18-20        691  70.9 11256      552      830
##  ROB_19-20       1543 139.7 11256     1269     1817
## 
## Confidence level used: 0.95
pairs(m.lst2)
##  contrast                  estimate    SE    df t.ratio p.value
##  (ROB_15-20) - (ROB_16-20)    -61.8  45.0 11256  -1.374  0.6447
##  (ROB_15-20) - (ROB_17-20)   -189.8  53.1 11256  -3.576  0.0032
##  (ROB_15-20) - (ROB_18-20)   -333.5  77.3 11256  -4.315  0.0002
##  (ROB_15-20) - (ROB_19-20)  -1185.0 143.1 11256  -8.283  <.0001
##  (ROB_16-20) - (ROB_17-20)   -127.9  54.3 11256  -2.355  0.1281
##  (ROB_16-20) - (ROB_18-20)   -271.6  78.1 11256  -3.476  0.0046
##  (ROB_16-20) - (ROB_19-20)  -1123.2 143.5 11256  -7.825  <.0001
##  (ROB_17-20) - (ROB_18-20)   -143.7  83.0 11256  -1.731  0.4150
##  (ROB_17-20) - (ROB_19-20)   -995.3 146.3 11256  -6.805  <.0001
##  (ROB_18-20) - (ROB_19-20)   -851.6 156.7 11256  -5.435  <.0001
## 
## P value adjustment: tukey method for comparing a family of 5 estimates
set.seed(5877)
data3 %>%
  ggplot(aes(x=TIME, 
             y=COST_Real, 
             color=typeproc2))+
  geom_point()+
  geom_smooth(method="lm")

set.seed(5877)
data3 %>%
  ggplot(aes(x=TIME, 
             y=COST_Real, 
             color=typeproc2))+
  geom_smooth(method="lm")

#Linear Model with more variables

# Linear Regression Model of Cost 
lm.Cost <- lm(COST_Real ~ Tot_Open +Tot_Lap+ Tot_Rob + TOTAL_HOSPITAL_BEDS + CCRATIO,
             data = data2, subset = typeofproc2 == "ROBOTIC")
summary(lm.Cost) 
## 
## Call:
## lm(formula = COST_Real ~ Tot_Open + Tot_Lap + Tot_Rob + TOTAL_HOSPITAL_BEDS + 
##     CCRATIO, data = data2, subset = typeofproc2 == "ROBOTIC")
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5032.8 -1349.7  -279.5  1001.6 12222.3 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         5750.6313   161.5542  35.596  < 2e-16 ***
## Tot_Open              -9.9168     1.9254  -5.150 2.77e-07 ***
## Tot_Lap               12.5956     2.9187   4.315 1.65e-05 ***
## Tot_Rob               17.3132     1.6763  10.328  < 2e-16 ***
## TOTAL_HOSPITAL_BEDS    1.9109     0.2151   8.883  < 2e-16 ***
## CCRATIO             1090.6154  1454.4052   0.750    0.453    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2018 on 2897 degrees of freedom
## Multiple R-squared:  0.1136, Adjusted R-squared:  0.1121 
## F-statistic: 74.29 on 5 and 2897 DF,  p-value: < 2.2e-16
# Linear Regression Model of Operating Cost 

lm.CostOpr <- lm(COST_OPR_Real ~ Tot_Open +Tot_Lap+ Tot_Rob + TOTAL_HOSPITAL_BEDS + CCRATIO,
             data = data2, subset = typeofproc2 == "ROBOTIC")
summary(lm.CostOpr) 
## 
## Call:
## lm(formula = COST_OPR_Real ~ Tot_Open + Tot_Lap + Tot_Rob + TOTAL_HOSPITAL_BEDS + 
##     CCRATIO, data = data2, subset = typeofproc2 == "ROBOTIC")
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3845.9 -1311.5  -304.5  1141.0 12125.2 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         4587.5930   151.1116  30.359  < 2e-16 ***
## Tot_Open             -21.3015     1.8010 -11.828  < 2e-16 ***
## Tot_Lap                0.3128     2.7301   0.115    0.909    
## Tot_Rob                8.7389     1.5679   5.573 2.73e-08 ***
## TOTAL_HOSPITAL_BEDS    1.5105     0.2012   7.507 8.03e-14 ***
## CCRATIO               46.8899  1360.3951   0.034    0.973    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1887 on 2897 degrees of freedom
## Multiple R-squared:  0.08344,    Adjusted R-squared:  0.08186 
## F-statistic: 52.75 on 5 and 2897 DF,  p-value: < 2.2e-16